這是一個關於澳門第三次全民核酸的初步分析,核酸檢測時間為2021年10月4日9pm -7日9pm。
因第二次全民核酸已得顯著改善,故第三次核酸的安排鼓勵縮短整體時間至兩日內,而第三日僅開放部分八個檢測點以延續正常檢測工作。
這裡的分析著重看前兩日內的各站點壓力表現,每檢測點平均半小時負荷量均控制在36以下,和上次持平,表現頗為理想。
本次數據來源分別是
Station-image-tag
PreventCovid-image-tag
filelist <- dir('../../aptmon-scraping/20211004', full.names=TRUE)
scrp <- data.frame()
for (file in filelist) {
filetimestr <- sub(".csv", "",sub(".*-", "", file))
filetime <- strptime(filetimestr,"%Y%m%d%H%M%S")
temp <- read.csv(file, na = "---")
temp$DateTime <- as.POSIXlt(filetime)
scrp <- rbind(scrp,as.data.frame(temp))
}
attr(scrp$DateTime, "tzone") <- "GMT"
scrp$DateTimeRound <- round(scrp$DateTime, "30mins")
scrp$WaitingQueue <- as.numeric(as.character(sub("*人", "", scrp$輪候人數)))
scrp$WaitingMinutes <- as.numeric(as.character(sub("分鐘", "",sub(".*>", "", scrp$等候時間))))
scrp$StationCount <- rowSums(scrp[ ,c("口採樣點", "鼻採樣點")], na.rm=TRUE)
scrp$HourNumber <-
sapply(strsplit(substr(scrp$DateTimeRound,12,16),":"),
function(x) {
x <- as.numeric(x)
x[1]+x[2]/60
}
)
str(scrp)
## 'data.frame': 12113 obs. of 15 variables:
## $ X : int 1 2 3 4 5 6 7 8 9 10 ...
## $ 序號 : chr "A01" "A02" "A03" "A04" ...
## $ 地點 : chr "街坊會聯合總會社區服務大樓街坊會聯合總會社區服務大樓口採樣點: 2鼻採樣點: 3輪候人數:175人等候時間:<U+2B24>35分鐘"| __truncated__ "慈幼中學慈幼中學口採樣點: 1鼻採樣點: 1輪候人數:228人等候時間:<U+2B24>114分鐘類別: (A) 關愛專站(無需預約)" "望廈體育中心三樓望廈體育中心三樓口採樣點: 3鼻採樣點: 2輪候人數:25人等候時間:<U+2B24>5分鐘類別: (A) 關愛專站(無需預約)" "塔石體育館B館塔石體育館B館口採樣點: 2鼻採樣點: 2輪候人數:30人等候時間:<U+2B24>8分鐘類別: (A) 關愛專站(無需預約)" ...
## $ 口採樣點 : int 2 1 3 2 2 1 3 4 2 3 ...
## $ 鼻採樣點 : int 3 1 2 2 1 1 2 2 2 2 ...
## $ 輪候人數 : chr "175人" "228人" "25人" "30人" ...
## $ 等候時間 : chr "<U+2B24>35分鐘" "<U+2B24>114分鐘" "<U+2B24>5分鐘" "<U+2B24>8分鐘" ...
## $ 類別 : chr "A" "A" "A" "A" ...
## $ Location : chr "街坊會聯合總會社區服務大樓" "慈幼中學" "望廈體育中心三樓" "塔石體育館B館" ...
## $ DateTime : POSIXlt, format: "2021-10-04 21:21:48" "2021-10-04 21:21:48" ...
## $ DateTimeRound : POSIXct, format: "2021-10-04 21:30:00" "2021-10-04 21:30:00" ...
## $ WaitingQueue : num 175 228 25 30 260 38 120 160 120 130 ...
## $ WaitingMinutes: num 35 114 5 8 87 19 24 27 30 26 ...
## $ StationCount : num 5 2 5 4 3 2 5 6 4 5 ...
## $ HourNumber : num 21.5 21.5 21.5 21.5 21.5 21.5 21.5 21.5 21.5 21.5 ...
filelist2 <- dir('../../RNA010/20211004', full.names=TRUE)
file2 <- tail(filelist2, 1)
sheets <- c("20211004A","20211005A","20211006A","20211007A")
df <- data.frame()
for (sheetname in sheets) {
#sheetname <- "20211004A"
### Extract first row for location list
cnames <- read_excel(file2, sheet = sheetname, n_max = 0, na = "---") %>% names()
lls1 <- sub(".*?-", "",cnames[seq(6, length(cnames), 3)])
### Extract data from 2nd row
rdf1 <- read_excel(file2, sheet=sheetname, na = "---", skip = ifelse(sheetname == "20211004A", 2, 1)) # skip 2 because there exists a hidden row 1 in this spreadsheet
### Set date
rdf1$TestDate <- as.Date(strptime(str_remove(sheetname, "A"),"%Y%m%d"))
rdf1$TestTime <- substr(rdf1$預約時段,1,5)
### select columns and rows
sdf1 <- rdf1 %>% select(c(6:ncol(rdf1))) %>% slice(2:nrow(rdf1)) %>% select(-contains("總人次"))
### Repeat Location info for number of rows
Location <- rep(lls1, each = nrow(sdf1) * 2)
### Melt to pivot
sdf1 <- as.data.frame(sdf1)
mdf1 <- reshape::melt(sdf1, id = c("TestDate", "TestTime"))
### Combine Location with dataset
df1 <- cbind(Location,mdf1)
### Clean away column names with ...
df1$variable <- sub("\\....*", "", df1$variable)
df <- rbind(df,as.data.frame(df1))
}
pdf <- df %>% pivot_wider(names_from = variable, values_from = value)
pdf <- as.data.frame(pdf)
pdf$TestCount <- rowSums(pdf[ ,c("口咽拭", "鼻咽拭")], na.rm=TRUE)
pdf$DateTimeRound <- as.POSIXlt(paste(pdf$TestDate, pdf$TestTime))
attr(pdf$DateTimeRound, "tzone") <- "GMT"
pdf$HourNumber <-
sapply(strsplit(pdf$TestTime,":"),
function(x) {
x <- as.numeric(x)
x[1]+x[2]/60
}
)
str(pdf)
## 'data.frame': 4608 obs. of 8 variables:
## $ Location : chr "聖若瑟教區中學第六校" "聖若瑟教區中學第六校" "聖若瑟教區中學第六校" "聖若瑟教區中學第六校" ...
## $ TestDate : Date, format: "2021-10-04" "2021-10-04" ...
## $ TestTime : chr "21:00" "21:30" "22:00" "22:30" ...
## $ 口咽拭 : num 110 92 92 102 113 124 77 76 69 60 ...
## $ 鼻咽拭 : num 56 57 64 65 53 41 35 36 42 44 ...
## $ TestCount : num 166 149 156 167 166 165 112 112 111 104 ...
## $ DateTimeRound: POSIXlt, format: "2021-10-04 21:00:00" "2021-10-04 21:30:00" ...
## $ HourNumber : num 21 21.5 22 22.5 23 23.5 21 21.5 22 22.5 ...
LonLat <- unique(scrp[c("Location")])
LonLat$MapLoc <- ifelse(LonLat$Location == "科大體育館","Macao, 澳門科技大學室內體育館 Gymnasium",LonLat$Location)
LonLat[grep(".*工人體育場", LonLat$Location, perl=T), ]$MapLoc <- "Macao, 工人體育場館"
LonLat[grep(".*1樓", LonLat$Location, perl=T), ]$MapLoc <- "望廈體育中心 Centro Desportivo Mong-Há"
LonLat <- mutate_geocode(LonLat, MapLoc)
LonLat$area <- ifelse(LonLat$lat>=22.17,'macao','cotai,macao')
str(LonLat)
## 'data.frame': 43 obs. of 5 variables:
## $ Location: chr "街坊會聯合總會社區服務大樓" "慈幼中學" "望廈體育中心三樓" "塔石體育館B館" ...
## $ MapLoc : chr "街坊會聯合總會社區服務大樓" "慈幼中學" "望廈體育中心三樓" "塔石體育館B館" ...
## $ lon : num 114 114 114 114 114 ...
## $ lat : num 22.2 22.2 22.2 22.2 22.2 ...
## $ area : chr "macao" "macao" "macao" "macao" ...
station <- scrp %>% group_by(序號,Location,類別,DateTimeRound,HourNumber) %>%
summarise(
StationCount.mean = mean(StationCount, na.rm = TRUE),
StationCount.median = median(StationCount, na.rm = TRUE),
口採樣點.mean = mean(口採樣點, na.rm = TRUE),
鼻採樣點.mean = mean(鼻採樣點, na.rm = TRUE),
口採樣點.median = median(口採樣點, na.rm = TRUE),
鼻採樣點.median = median(鼻採樣點, na.rm = TRUE),
WaitingQueue.mean = mean(WaitingQueue, na.rm = TRUE),
WaitingMinutes.mean = mean(WaitingMinutes, na.rm = TRUE),
WaitingQueue.median = median(WaitingQueue, na.rm = TRUE),
WaitingMinutes.median = median(WaitingMinutes, na.rm = TRUE),
) %>% as.data.frame()
ldf <- merge(LonLat, pdf)
mdf <- merge(station, pdf, by = c("Location","DateTimeRound","HourNumber"))
mdf <- mdf %>%
mutate(TestPerStation = mdf$TestCount / ifelse(is.na(mdf$StationCount.mean) | mdf$StationCount.mean == 0, 1, mdf$StationCount.mean),
TestPerStation.ntile = ntile(TestPerStation, 4),
MouthPerStation = mdf$口咽拭 / ifelse(is.na(mdf$口採樣點.mean) | mdf$口採樣點.mean == 0 , 1, mdf$口採樣點.mean),
NosePerStation = mdf$鼻咽拭 / ifelse(is.na(mdf$鼻採樣點.mean) | mdf$鼻採樣點.mean == 0, 1, mdf$鼻採樣點.mean))
lldf <- merge(LonLat, mdf)
sum(pdf$TestCount,na.rm = TRUE)
## [1] 627865
p1 <- df %>% group_by(variable) %>% summarise(value.sum = sum(value, na.rm = TRUE))
g1 <- ggplot(data=p1, aes(x = variable, y = value.sum, fill = variable, alpha = .9)) +
geom_bar(stat="identity") + coord_flip() +
scale_color_viridis_d(option = 'magma') + scale_fill_viridis_d(option = 'magma') +
theme_minimal() +
ggtitle("Total RNA test by sampling method") + xlab("Sampling method") + ylab("Total bookings")
p2 <- ldf %>% group_by(area) %>% tally(TestCount)
g2 <- ggplot(data=p2, aes(x = area, y = n, fill = area, alpha = .9)) +
geom_bar(stat="identity") + coord_flip() +
scale_color_viridis_d(option = 'magma') + scale_fill_viridis_d(option = 'magma') +
theme_minimal() +
ggtitle("Total RNA test by area") + xlab("Macao / Cotai") + ylab("Total bookings")
grid.arrange(g1,g2,ncol=1)
p3 <- pdf %>% group_by(Location, TestDate) %>% tally(TestCount)
ggplot(data=p3, mapping = aes(x = reorder(Location,n), n, fill = factor(TestDate), alpha = .9)) +
geom_bar(stat = "identity") + coord_flip() +
scale_color_viridis_d(option = 'magma') + scale_fill_viridis_d(option = 'magma') +
theme_minimal() +
ggtitle("Total RNA test by location") + xlab("Location") + ylab("Total bookings")
tilevalue <- c(max(filter(lldf, TestPerStation.ntile == 1)$TestPerStation),
max(filter(lldf, TestPerStation.ntile == 2)$TestPerStation),
max(filter(lldf, TestPerStation.ntile == 3)$TestPerStation),
max(filter(lldf, TestPerStation.ntile == 4)$TestPerStation))
tilevalue
## [1] 26.53846 33.48000 42.00000 122.37500
fw1 <- ggplot(lldf) + geom_point(aes(x = StationCount.mean, y = TestCount, color = factor(TestDate), alpha = .7)) +
scale_color_viridis_d(labels = sort(unique(lldf$TestDate)), option = 'magma', alpha = .7) +
theme_minimal() + facet_wrap(~TestPerStation.ntile, nrow = 4)
#guides(color = FALSE, alpha = FALSE)
fw2 <- ggplot(lldf, aes(x = HourNumber, color = factor(TestDate), fill = factor(TestDate))) +
geom_histogram(binwidth = 1, alpha = .5) +
geom_hline(linetype = "dotted", yintercept = 45, color = "#fc8961") +
scale_color_viridis_d(option = 'magma') + scale_fill_viridis_d(option = 'magma') +
facet_wrap(~TestPerStation.ntile, nrow = 4) +
theme_minimal() #+ theme(legend.position = "none")
grid.arrange(fw1,fw2,ncol=2)
p4 <- lldf %>% filter(TestPerStation.ntile == 4 & TestDate == '2021-10-05')
plot <- ggmap(get_map(location = "taipa, macao", zoom = 12), darken = .5,
base_layer = ggplot(data = p4, aes(x = lon, y = lat, frame = HourNumber, ids = Location))) +
geom_point(data = p4, aes(color = TestCount, size = TestCount, alpha = .5)) +
scale_size(range = c(0, 12)) +
scale_color_viridis_c(option = "magma")
ggplotly(plot)
ggpairs(lldf[c("WaitingQueue.mean","WaitingMinutes.mean","TestCount","StationCount.mean")], aes(color = factor(lldf$TestPerStation.ntile), alpha = .3))